{
 "cells": [
  {
   "cell_type": "code",
   "id": "initial_id",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# series的创建\n",
    "import pandas as pd\n",
    "s = pd.Series([10,2,3,4,5])\n",
    "# 自定义索引\n",
    "s = pd.Series([10,2,3,4,5], index=['A', 'B', 'C', 'D', 'E'])\n",
    "# s = pd.Series([10,2,3,4,5], index=[1,2,3,4,5])\n",
    "# 定义name\n",
    "s = pd.Series([10,2,3,4,5], index=['A', 'B', 'C', 'D', 'E'], name = '月份')\n",
    "print(s)"
   ],
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "# 通过字典来创建\n",
    "s = pd.Series({\"a\":1,\"b\":2,\"c\":3,\"d\":4,\"e\":5})\n",
    "# print(s)\n",
    "s2 = pd.Series([10,2,3,4,5], index=['A', 'B', 'C', 'D', 'E'], name = '月份')\n",
    "s1 = pd.Series(s2,index=[\"A\",\"C\"])\n",
    "print(s1)"
   ],
   "id": "18dc6b74de42d5f3",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "# series的属性\n",
    "'''\n",
    "index:Series的索引对象\n",
    "values:Series的值\n",
    "dtype或dtypes\"Series的元素类型\n",
    "shape:Series的形状\n",
    "ndim:Series的维度\n",
    "size:Series的元素个数\n",
    "name:Series的名称\n",
    "loc[]  显式索引，按标签索引或切片\n",
    "iloc[]  隐式索引，按位置索引或切片\n",
    "at[]  使用标签访问单个元素\n",
    "iat[]  使用位置访问单个元素\n",
    "'''\n",
    "# print(s.index)\n",
    "# print(s.values)\n",
    "# print(s.shape,s.ndim,s.size)\n",
    "# s.name = 'test'\n",
    "# print(s.dtype,s.name)\n",
    "print(s.loc['a']) #显式索引\n",
    "print(s.iloc[0])  #隐式索引\n",
    "print(s.at['a'])\n",
    "print(s.iat[0])"
   ],
   "id": "96af6082ec704c4b",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "# 访问数据\n",
    "# print(s[1])\n",
    "# print(s['c'])\n",
    "# print(s)\n",
    "# print(s[s<3])\n",
    "s['f']=6\n",
    "print(s.head(2))\n",
    "print(s.tail(1))"
   ],
   "id": "c7b39662878077c4",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "# 常见函数\n",
    "s = pd.Series([10,2,np.nan,None,3,4,5], index=['A', 'B', 'C', 'D', 'E','F','G'], name= 'data')\n",
    "print(s)"
   ],
   "id": "81f4ff0637802e99",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "s.head(3)  # 默认取前5行的数据\n",
    "s.tail(2)   #默认取后5行的数据"
   ],
   "id": "62aa0810c4a358e6",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "# 查看所有的描述性信息\n",
    "s.describe()"
   ],
   "id": "d716a15eaf24a33b",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "# 获取元素个数(忽略缺失值）\n",
    "print(s.count())"
   ],
   "id": "4c86810f9940555f",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "# 获取索引\n",
    "print(s.keys())   # 方法\n",
    "print(s.index)   # 属性"
   ],
   "id": "731e8ed9d1505c3f",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "print(s.isna())  #检查Series里的每一个元素是否为缺失值\n",
    "s.isna()"
   ],
   "id": "a3b98aa962570f9d",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": "s.isin([4,5,6])  # 检查每个元素是否在参数集合中",
   "id": "6bc1b3ccfbf2785c",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": "s.describe()",
   "id": "a7c7eb85736467a5",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "print(s.mean())  #平均值\n",
    "print(s.sum())   #总和\n",
    "print(s.std())   #标准差\n",
    "print(s.var())   #方差\n",
    "print(s.min()) #最小值\n",
    "print(s.max())  #最大值\n",
    "print(s.median())  #中位数"
   ],
   "id": "a6704c7f90d8583a",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": "print(s)",
   "id": "aebcf7bc1512bb6",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "# print(s.sort_values())\n",
    "print(s.quantile(0.8)) #分位数\n",
    "#————————————————\n",
    "#2  3   4  5   10\n",
    "#位置 4*0.8=3.2\n",
    "#值的计算  5 + （10-5）*0.2 = 6"
   ],
   "id": "6733c71b4b72594f",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "#众数\n",
    "s['H']=4\n",
    "print(s.mode())"
   ],
   "id": "ee2dc2a67d792c65",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": "print(s.value_counts())  # 每个元素的计数",
   "id": "c6872e9718e78a58",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "s.drop_duplicates()  #去重\n",
    "s.unique()\n",
    "print(s.nunique()) #去重后的元素个数"
   ],
   "id": "81e30c05059a56b0",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "# 排序  值、索引\n",
    "s.sort_index()  # 按索引排序\n",
    "s.sort_values()  #按值排序"
   ],
   "id": "5079c0367697814f",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "'''创建一个包含10名学生数学成绩的Series，成绩范围在50-100之间。\n",
    "计算平均分、最高分、最低分，并找出高于平均分的学生人数。'''\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "np.random.seed(42)\n",
    "values = np.random.randint(50,101,10)\n",
    "indexes = []\n",
    "for i in range(1,11):\n",
    "    indexes.append('学生'+str(i))\n",
    "scores = pd.Series(values,indexes)\n",
    "# print(scores)\n",
    "print('平均分：',scores.mean())\n",
    "print('最高分：',scores.max())\n",
    "print('最低分：',scores.min())\n",
    "# 高于平均分的学生人数\n",
    "mean = scores.mean()\n",
    "print('高于平均分的学生人数:',len(scores[scores>mean]))\n",
    "print('高于平均分的学生人数:',scores[scores>mean].count())"
   ],
   "id": "821c1619b3f2e43d",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "'''温度数据统计\n",
    "给定某城市一周每天的最高温度Series，完成以下任务：\n",
    "找出温度超过30度的天数\n",
    "计算平均温度\n",
    "将温度从高到低排序\n",
    "找出温度变化最大的两天\n",
    "'''\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "temperatures = pd.Series([28, 31, 29, 32, 30, 27, 33],\n",
    "                         index=['周一', '周二', '周三', '周四', '周五', '周六', '周日'])"
   ],
   "id": "acc34c120e244526",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "# 找出温度超过30度的天数\n",
    "n = temperatures[temperatures>30].count()\n",
    "print('超过30度的天数：',n)"
   ],
   "id": "c729173b291f3b8b",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "# 计算平均温度\n",
    "print('平均温度：',temperatures.mean())"
   ],
   "id": "d6d688d6c46debcb",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "# 将温度从高到低排序\n",
    "t2 = temperatures.sort_values(ascending=False)\n",
    "print('从高到低排序：',t2)"
   ],
   "id": "b10f2bcea7559450",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "# 找出温度变化最大的两天\n",
    "# 28 31 29 32 30 27 33\n",
    "# none 3 -2 3 -2 -3 6\n",
    "t3 = temperatures.diff().abs()   #计算series的变化值\n",
    "\n",
    "print('温度变化最大的两天',*(t3.sort_values(ascending=False).keys()[:2].tolist()))"
   ],
   "id": "4c39ee895446a466",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "'''\n",
    "股票价格分析\n",
    "给定某股票连续10个交易日的收盘价Series：\n",
    "计算每日收益率（当日收盘价/前日收盘价 - 1）\n",
    "找出收益率最高和最低的日期\n",
    "计算波动率（收益率的标准差）\n",
    "\n",
    "\n",
    "prices = pd.Series([102.3, 103.5, 105.1, 104.8, 106.2, 107.0, 106.5, 108.1, 109.3, 110.2], index=pd.date_range('2023-01-01', periods=10))\n",
    "'''"
   ],
   "id": "e04a283be264fea8",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "# 日期序列\n",
    "date = pd.date_range('2000-06-1',periods=60)\n",
    "print(list(date))"
   ],
   "id": "61888e31689123a4",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": "prices = pd.Series([102.3, 103.5, 105.1, 104.8, 106.2, 107.0, 106.5, 108.1, 109.3, 110.2], index=pd.date_range('2023-01-01', periods=10))",
   "id": "41bbfa0725545ff3",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": "prices",
   "id": "88f1c55daebe7949",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "'''计算每日收益率（当日收盘价/前日收盘价 - 1）\n",
    "找出收益率最高和最低的日期\n",
    "计算波动率（收益率的标准差）'''\n",
    "# 计算每日收益率\n",
    "a = prices.pct_change()  #percent  103.5/102.3 - 1"
   ],
   "id": "c008307da17b15e2",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "# 收益率最高的日期\n",
    "print(a.idxmax())\n",
    "# 收益率最低的日期\n",
    "print(a.idxmin())"
   ],
   "id": "eac2c5c2b026730d",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "# 波动率\n",
    "print(a.std())"
   ],
   "id": "469ad2e6e97ad33",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "'''销售数据分析\n",
    "某产品过去12个月的销售量Series：\n",
    "计算季度平均销量（每3个月为一个季度）\n",
    "找出销量最高的月份\n",
    "计算月环比增长率\n",
    "找出连续增长超过2个月的月份\n",
    "\n",
    "sales = pd.Series([120, 135, 145, 160, 155, 170, 180, 175, 190, 200, 210, 220],index=pd.date_range('2022-01-01', periods=12, freq='MS'))'''"
   ],
   "id": "7f1feaf83ec3180f",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": "a = pd.date_range('2022-01-01', periods=12, freq='MS')",
   "id": "a75916878370374b",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": "sales = pd.Series([120, 135, 145, 160, 155, 170, 180, 175, 190, 200, 210, 220],index=pd.date_range('2022-01-01', periods=12, freq='MS'))",
   "id": "f07ee342246791db",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": "sales",
   "id": "4c924e73f338505c",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "# 季度的平均销量\n",
    "# (120+135+145)/3 = 400/3\n",
    "sales.resample('QS').mean()  #重新采样"
   ],
   "id": "d4934c48903f1a85",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": "print('销量最高的月份',sales.idxmax())",
   "id": "15a3cbe7a7835a86",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "print('月环比的增长率')\n",
    "sales.pct_change()"
   ],
   "id": "de09b3f163973d1b",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "# 找出连续增长超过2个月的月份\n",
    "sales"
   ],
   "id": "5843d4dacde5e9ac",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "a = sales.pct_change()\n",
    "b=a>0\n",
    "b[b.rolling(3).sum()==3].keys().tolist()"
   ],
   "id": "5934b2b25c95a673",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "'''每小时销售数据分析\n",
    "某商店每小时销售额Series：\n",
    "按天重采样计算每日总销售额\n",
    "计算每天营业时间（8:00-22:00）和非营业时间的销售额比例\n",
    "找出销售额最高的3个小时'''"
   ],
   "id": "dffa5987cdfb2111",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "np.random.seed(42)\n",
    "h = pd.Series(np.random.randint(0,100,24),\n",
    "          index=pd.date_range('2025-01-01',periods=24,freq='h'))\n",
    "# 按天重采样计算每日总销售额\n",
    "day_sales = h.resample('D').sum()\n",
    "# hours_sales.sum()\n",
    "# 计算每天营业时间（8:00-22:00）和非营业时间的销售额比例\n",
    "mask =(h.index.hour>=8) & ((h.index.hour<=22))\n",
    "b = h[mask]\n",
    "n_b = h[~mask]\n",
    "print(b.sum()/n_b.sum())\n",
    "# 找出销售额最高的3个小时\n",
    "print(h.nlargest(3).keys())"
   ],
   "id": "1d51458c58a30e16",
   "outputs": [],
   "execution_count": null
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
